package com.foreveross.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;

public class ParseUtil {

	/**
	 * 返回URI路径
	 * 
	 * @param doc
	 * @return
	 */
	public static String getBaseUri(org.jsoup.nodes.Document doc) {
		if (null != doc) {
			return doc.baseUri().toString();
		} else {
			return null;
		}
	}

	public static String getCharset(org.jsoup.nodes.Document doc) {
		String content = doc.toString();
		String defaultCharset = "utf-8";
		Pattern p = Pattern.compile("(?<=charset=)(.+)(?=\")");
		Matcher m = p.matcher(content);
		if (m.find()) {
			return m.group();
		} else {
			return defaultCharset;
		}
	}

	public static String getContent(org.jsoup.nodes.Document doc, String charset) {
		String content = null;
		if (null != doc) {
			content = Jsoup.parse(doc.toString(), charset).body().text();
		}
		return content;

	}
}
